#ifndef LWPF_SHARED_CPE_
#define LWPF_SHARED_CPE_
#include <qthread_slave.h>
#include "dma_macros_new.h"
extern __thread_local LWPF_PCVEC lwpf_local_counter[];
#ifndef __cplusplus
#define LWPF_WEAK_DATA __attribute__((weak)) __attribute__((section(".data")))
#define LWPF_WEAK __attribute__((weak))
#else
#define LWPF_WEAK_DATA __attribute__((weak)) __attribute__((section(".data")))
#define LWPF_WEAK __attribute__((weak))
#endif

static void lwpf_sync_counters_m2c(void *lwpf_cpe_counter, int kernel_count) {
  volatile int reply = 0;
  dma_init();
  pe_get(lwpf_cpe_counter, lwpf_local_counter, sizeof(LWPF_PCVEC) * kernel_count);
  dma_syn();
}

static void lwpf_sync_counters_c2m(void *lwpf_cpe_counter, int kernel_count) {
  //void *local_counter;
  //weak_local(local_counter, "lwpf_local_counter");
  volatile int reply = 0;
  dma_init();
  pe_put(lwpf_cpe_counter, lwpf_local_counter, sizeof(LWPF_PCVEC) * kernel_count);
  dma_syn();
}

#define lwpf_enter(x)                                                              \
  {                                                                                \
    lwpf_sync_counters_m2c(lwpf_global_counter_##x[_MYID], lwpf_kernel_count_##x); \
    config_pcrs(lwpf_evt_config_##x.evt);                                          \
  }

#define lwpf_exit(x) lwpf_sync_counters_c2m(lwpf_global_counter_##x[_MYID], lwpf_kernel_count_##x);

#define lwpf_start(kernel)                                                  \
  {                                                                         \
    LWPF_PCVEC cntrs;                                                       \
    asm volatile(read_pcrs("%1") "vsubl %2, %1, %0\n\t"                     \
                 : "=" PCVEC(lwpf_local_counter[kernel]), "=&" PCVEC(cntrs) \
                 : "0"(lwpf_local_counter[kernel]));                        \
  }
#define lwpf_stop(kernel)                                                   \
  {                                                                         \
    LWPF_PCVEC cntrs;                                                       \
    asm volatile(read_pcrs("%1") "vaddl %2, %1, %0\n\t"                     \
                 : "=" PCVEC(lwpf_local_counter[kernel]), "=&" PCVEC(cntrs) \
                 : "0"(lwpf_local_counter[kernel]));                        \
  }

#endif

#ifdef __sw_slave__

#define K(x) x,
typedef enum {
  LWPF_KERNELS
#define U(x) LWPF_KERNELS_END_##x
      LWPF_UNIT
#undef U
#define U(x) lwpf_kernel_##x
} LWPF_UNIT;
#undef U
#undef K

#define K(x) #x,
#define U(x) lwpf_kernel_names_##x
LWPF_WEAK const char *LWPF_UNIT[] = {LWPF_KERNELS "LWPF_KERNELS_END"};
#undef K
#undef U

#define U(x) lwpf_kernel_count_##x
LWPF_WEAK extern const long LWPF_UNIT =
#undef U
#define U(x) LWPF_KERNELS_END_##x
    LWPF_UNIT;
#undef U
#define U(x) lwpf_evt_config_##x
LWPF_WEAK_DATA evt_conf_t LWPF_UNIT;
#undef U
#define U(x) lwpf_global_counter_##x
LWPF_WEAK_DATA long LWPF_UNIT[64][
#undef U
#define U(x) LWPF_KERNELS_END_##x
    LWPF_UNIT][NPCR];


#ifdef __sw7__ //A compiler bug make the weak attribute unusable for variable sized definition, unstable fix
LWPF_WEAK __thread_local LWPF_PCVEC lwpf_local_counter[16];
#else
LWPF_WEAK __thread_local LWPF_PCVEC lwpf_local_counter[LWPF_UNIT];
#endif
#undef U

#define U(x) lwpf_init_##x
#ifdef __cplusplus
extern "C"
#endif
LWPF_WEAK void LWPF_UNIT(evt_conf_t *conf) {
#undef U
  int i;
  evt_conf_t lconf;
  if (!conf) {
#define U(x) lwpf_evt_config_##x
    lconf.pc_mask = 0;
#ifdef EVT_PC0
    lconf.evt[0] = EVT_PC0;
    lconf.pc_mask |= MASK_PC(0);
#endif
#ifdef EVT_PC1
    lconf.evt[1] = EVT_PC1;
    lconf.pc_mask |= MASK_PC(1);
#endif
#ifdef EVT_PC2
    lconf.evt[2] = EVT_PC2;
    lconf.pc_mask |= MASK_PC(2);
#endif
#ifdef EVT_PC3
    lconf.evt[3] = EVT_PC3;
    lconf.pc_mask |= MASK_PC(3);
#endif
#ifdef EVT_PC4
    lconf.evt[4] = EVT_PC4;
    lconf.pc_mask |= MASK_PC(4);
#endif
#ifdef EVT_PC5
    lconf.evt[5] = EVT_PC5;
    lconf.pc_mask |= MASK_PC(5);
#endif
#ifdef EVT_PC6
    lconf.evt[6] = EVT_PC6;
    lconf.pc_mask |= MASK_PC(6);
#endif
#ifdef EVT_PC7
    lconf.evt[7] = EVT_PC7;
    lconf.pc_mask |= MASK_PC(7);
#endif
  } else {
    for (i = 0; i < NPCR; i++)
      lconf.evt[i] = conf->evt[i];
    lconf.pc_mask = conf->pc_mask;
  }
  if (_MYID == 0) {
    dma_init();
    pe_put(&(LWPF_UNIT), &lconf, sizeof(evt_conf_t));
    dma_syn();
  }
#undef U
#define U(x) LWPF_KERNELS_END_##x
  LWPF_PCVEC v0;
  asm("vcpys $31, $31, %0"
      : "=r"(v0));
  for (i = 0; i < LWPF_UNIT; i++) {
    lwpf_local_counter[i] = v0;
  }
#undef U
#define U(x) lwpf_exit(x);
  LWPF_UNIT
#undef U
}
#endif
